cap log close
set more 1
clear



// March 2023, Alm, Leguizamon^2 (Marriage Tax and Race) //
// Prepare the datasets to find families //
// After 1995 we can start identifying cohabitating couples (these will be dropped from the main analysis later) //

//Remember to set the right directory path and the approprate folders // 

#d cr 

/* Load the Household stripped files */
// upload data with households that are single, married or cohabitating  //
// These data were downloaded from IPUMS CPS files. Used the Stata Readme files to put them in Stata format //

use "dta\rawdata_ipumscps\last_master92.dta", clear

// Drop minors (not civilian noninstitutionalized civilians) These may be kids responding the questionnaire //
drop if age<16

// create unique household identifier //
gen double uniqhh=(year*10000000)+(serial*100) 

// declare households without a spouse/partner //
duplicates tag uniqhh, gen(nondup)

// drop househoulds without identifiable spouse/partner //
drop if nondup==0 // No partner and married spouse not present //
drop if nondup>1 & age<18 // May be a foster child //
drop if nondup>1 & (relate>203 | marst>2)  // keep those that are clearly married: Get rid of unmarried partners and heads with multiple unmarried parnters// 
 

drop nondup


// Find spouse location: CPS has a record of this for most couples, but not all: Will use to complement when not available // 
bysort uniqhh: egen splc=max(pernum) if sploc==0
bysort uniqhh: egen splc1=min(pernum) if sploc==0
gen splocation=splc if splc>pernum & sploc==0
replace splocation=splc1 if splc1<pernum & sploc==0 
replace sploc=splocation if sploc==0
drop splc splc1 splocation



gen famid1=1
replace famid1=1 if famid==2 & relate==101 // Head of household classified as secondary family // 
replace famid1=1 if famid>1 & (relate>=1114 & relate<=1117) // Make the rest of unmarried partners the same familiy //

drop uniqhh

// create a unique family id //
gen double uniqhh=(year*10000000)+(serial*100)+famid1


/* Generate Race of Partner/spouse */ 
gen race_partner_nh_tmp=0 if relate==101
replace race_partner_nh_tmp=race if relate!=101 
bysort uniqhh: egen r_spouse_nh=max(race_partner_nh_tmp)
gen r_partner=r_spouse_nh if relate==101
gen race_partner_hd_tmp=0 if relate!=101
replace race_partner_hd_tmp=race if relate==101
bysort uniqhh: egen r_spouse_hd=max(race_partner_hd_tmp)
replace r_partner=r_spouse_hd if relate!=101

// recode race for purpose of main analysis: same race and clearly identified as one race//

// Both White //
gen same_race=1 if race==r_partner & race==100
// Both Black //
replace same_race=2 if race==r_partner & race==200
// Both Other Race // 
replace same_race=3 if race==r_partner & (race==300 | race==650 | race==651 | race==652 | race==700 | race==808 | race==809 | race==815 | race==820 | race==830)
gen race1=same_race


/* Later samples started usign bi-racial classifications: Include these for robustness */
gen race2_temp=race1



// Also if both report half black in one of their races, and are the same race
replace race2_temp=2 if race==r_partner & race2_temp==. & (race==801 | race==805 | race==806 | race==807)
// Both report half white half other race (non-black) *** Black/Other race is considered black minority //
replace race2_temp=3 if race==r_partner & race2_temp==. & (race==802 | race==803 | race==804 | race==819)


/* Include couples with different race but both of them report part black or black*/
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==801 & (r_partner==805 | r_partner==806 | r_partner==807 | r_partner==200))
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==805 & (r_partner==801 | r_partner==806 | r_partner==807 | r_partner==200))
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==806 & (r_partner==801 | r_partner==805 | r_partner==807 | r_partner==200))
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==807 & (r_partner==801 | r_partner==805 | r_partner==806 | r_partner==200))
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==200 & (r_partner==801 | r_partner==805 | r_partner==806 | r_partner==807))


/*  For white comparison: At least 60% + white (one white and the other reporting white + other races */
replace race2_temp=1 if (race!=r_partner & race2_temp==. & race==100 & (r_partner==801 | r_partner==802 | r_partner==803 | r_partner==804 ///
	| r_partner==810 | r_partner==811 | r_partner==812 | r_partner==813 | r_partner==814 ///
	| r_partner==816 | r_partner==817))

replace race2_temp=1 if race!=r_partner & race2_temp==. & race==801 & r_partner==100 
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==802 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==803 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==804 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==810 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==811 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==812 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==813 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==814 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==816 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==817 & r_partner==100


	
/* Include couples with different race but both of them report part or full other non-black race or */

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==300 & (r_partner==650 | r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==650 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==651 & (r_partner==650 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==652 & (r_partner==651 | r_partner==650 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300)) 

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==700 & (r_partner==651 | r_partner==652 | r_partner==650 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==802 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==650 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300)) 
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==803 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==650 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300)) 
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==804 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==650 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==808 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==650 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==809 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==650 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==815 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==650 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300)) 

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==819 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==650 ///
 | r_partner==820 | r_partner==830 | r_partner==300))
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==820 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==650 | r_partner==830 | r_partner==300))
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==830 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==650 | r_partner==300)) 
 
 
gen race2=race2_temp

	
/* Identify couples with different race but one of the two are black */

// One of the two is Black  and the other is white or other race//
gen mx_black=0
replace mx_black=1 if race!=r_partner & race==200 & (r_partner==100 | r_partner==300 | r_partner==650 | r_partner==651 | r_partner==652 | r_partner==700)
replace mx_black=1 if race!=r_partner & race==100 & r_partner==200
replace mx_black=1 if race!=r_partner & race==300 & r_partner==200 
replace mx_black=1 if race!=r_partner & race==650 & r_partner==200 
replace mx_black=1 if race!=r_partner & race==651 & r_partner==200 
replace mx_black=1 if race!=r_partner & race==652 & r_partner==200
replace mx_black=1 if race!=r_partner & race==700 & r_partner==200

// Different race any other combination where other race is at least 50% 
gen mx_oth=0
replace mx_oth=1 if race!=r_partner & race==100 & (r_partner==300 | r_partner==650 | r_partner==651 | r_partner==652 | r_partner==700)
replace mx_oth=1 if race!=r_partner & race==300 & r_partner==100
replace mx_oth=1 if race!=r_partner & race==650 & r_partner==100
replace mx_oth=1 if race!=r_partner & race==651 & r_partner==100
replace mx_oth=1 if race!=r_partner & race==652 & r_partner==100
replace mx_oth=1 if race!=r_partner & race==700 & r_partner==100    

/* Finally, include when only one is black (regardless of other race) */
gen race3=race2
replace race3=2 if mx_black==1 & race3==.
replace race3=3 if mx_oth==1 & race3==. 
replace race3=3 if race3==.


// Hispanic Couples Vs. Non Hispanic Couples // 
gen hispanic=0 if hispan==0
replace hispanic=1 if hispan!=0 & hispan!=901 & hispan!=902

gen hisp_sp_nh=hispanic if relate!=101
replace hisp_sp_nh=0 if hisp_sp_nh==.
gen hisp_sp_hd=hispanic if relate==101
replace hisp_sp_hd=0 if hisp_sp_hd==.

bysort uniqhh: egen his_spouse_nh=max(hisp_sp_nh)
bysort uniqhh: egen his_spouse_hd=max(hisp_sp_hd)

gen hisp=1 if his_spouse_nh==1 & his_spouse_hd==1
replace hisp=0 if his_spouse_nh==0 & his_spouse_hd==0
replace hisp=2 if (his_spouse_nh==1 & his_spouse_hd==0) | (his_spouse_nh==0 & his_spouse_hd==1)



//unique household identifier //
gen double uniqhhld=(year*1000000)+(serial*10)
 

// unique original family identifier for merge//
gen double uniqfm=(year*10000000)+(serial*100)+ famid 

 
// Create a unique person identifier for merge with all person data //
gen double uniqid=(year*10000000)+(serial*100)+ pernum


// Determine cohabitating and married // 
gen cohab=1 if marst>1
replace cohab=0 if cohab==.


// Redefine head of family //

gen head=0
replace head=1 if marst==1 & famrel==1 // Those that we know are married with spouse present and appear as reference person //
replace head=1 if relate==101 & famrel==1 & cohab==1 // If claim head of household and is reference person, in cohabitating relationship //
replace head=1 if relate==101 & cohab==1

// Redefine Spouse //
gen spouse=0
replace spouse=1 if marst==1 & famrel==2 // Married and referred to as the spouse //
replace spouse=1 if relate!=101 & cohab==1 // Unmarried partners // 

// Check if we have all categorized as head or spouse // 
gen check= head + spouse
sum check

// Identify same-sex couples (these are probably underrepresented and are only identify by the sex of the unmarried partner) //  
duplicates tag uniqhh sex, generate(ss)


// Identify the father in the family for all members of the same family // 
gen father=0
replace father=pernum if head==1 & ss==1 & nchild==0 // Same sex couple use the head # as head of household and spouse as mother 
replace father=sploc if spouse==1 & ss==1 & nchild==0 // Same sex couple use the head # as head of household and spouse as mother
replace father=pernum if head==1 & ss==1  & nchild>0 
replace father=sploc if spouse==1 & ss==1 & nchild>0 

replace father=pernum if (head==1 | spouse==1) & sex==1 & ss!=1 
replace father=sploc if (head==1 | spouse==1) & sex==2 & ss!=1
// Identify the mother in the family for all members of the same family //
gen mother=0
replace mother=sploc if head==1 & ss==1 & nchild==0  // Same sex couple use the head # as partner and spouse as mother 
replace mother=pernum if spouse==1 & ss==1 & nchild==0 // Same sex couple use the head # as partner and spouse as mother
replace mother=sploc if head==1 & ss==1 &  nchild>0
replace mother=pernum if spouse==1 & ss==1 & nchild>0 
replace mother=pernum if (head==1 | spouse==1) & sex==2 & ss!=1
replace mother=sploc if (head==1 | spouse==1) & sex==1 & ss!=1

keep year serial uniqhh uniqhhld uniqfm uniqid race1 race2 race3 famid famid1 pernum cohab head spouse sploc father mother ss hispan hisp  // keep only the relevant variables // 
 
 
/* Saving as a master dataset to merge later with the married couples but not heads of household */
save "dta\rawdata_ipumscps\master1.dta", replace



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/* Open the dataset with couples that are married but are not categorized as heads of households (secondary families) */

use "dta\rawdata_ipumscps\last_nh_married92.dta", clear


// create unique household identifier //
gen double uniqhhld=(year*1000000)+(serial*10)

// Another family id to get unique identifiers //
gen famid1=0

// Find spouse location: CPS has a record of this for most couples, but not all: Will use to complement when not available // 
bysort uniqhhld: egen splc=max(pernum) if sploc==0  // This works because thre is only one household for each family with no sploc info there ( a few come from households with multiple couples, but only one per household of those has no sploc) // 
bysort uniqhhld: egen splc1=min(pernum) if sploc==0 // Same as above //
gen splocation=splc if splc>pernum & sploc==0
replace splocation=splc1 if splc1<pernum & sploc==0 
replace sploc=splocation if sploc==0
drop splc splc1 splocation

/* Start here to see if we can create the family group by using the pernum and sploc */ 



duplicates tag uniqhhld, gen(nondup)

replace famid1=famid if nondup==1
replace famid1=2 if famid==2 & nondup>1

// This allows to find out how to switch family ids in sequential order // 
bysort uniqhhld: egen totfam=sum(famid)
replace famid1=3 if famid==3 & totfam==10 & nondup==3
replace famid1=4 if famid==4 & nondup==3 & totfam==12
replace famid1=2 if famid==3 & nondup==3 & totfam==14
replace famid1=3 if famid==4 & nondup==3 & totfam==14
replace famid1=3 if famid==3 & nondup==3 & totfam==16
replace famid1=4 if famid==5 & nondup==3 & totfam==16
replace famid1=famid if totfam==18 & nondup==5
replace famid1=3 if famid==4 & totfam==22 & nondup==5
replace famid1=4 if famid==5 & totfam==22 & nondup==5
replace famid1=famid if totfam==24 & nondup==5

replace famid1=famid if nondup==7 & totfam==28
replace famid1=famid-1 if nondup==7 & totfam==34 & famid>3

drop totfam 


// Unique family identifier //
gen double uniqhh=(year*10000000)+(serial*100)+famid1


/* Generate Race of Partner/spouse */ 
gen race_partner=0 if famrel==1
replace race_partner=race if famrel!=1 
bysort uniqhh: egen r_spouse=max(race_partner)
gen r_partner=r_spouse if famrel==1

// recode race for purpose of main analysis: same race and clearly identified as one race//
// Both White //
gen same_race=1 if race==r_partner & race==100
// Both Black //
replace same_race=2 if race==r_partner & race==200
// Both Other Race // 
replace same_race=3 if race==r_partner & (race==300 | race==650 | race==651 | race==652 | race==700 | race==808 | race==809 | race==815 | race==820 | race==830)
gen race1=same_race


/* Later samples started usign bi-racial classifications: Include these for robustness */
gen race2_temp=race1



// Also if both report half black in one of their races, and are the same race
replace race2_temp=2 if race==r_partner & race2_temp==. & (race==801 | race==805 | race==806 | race==807)
// Both report half white half other race (non-black) *** Black/Other race is considered black minority //
replace race2_temp=3 if race==r_partner & race2_temp==. & (race==802 | race==803 | race==804 | race==819)


/* Include couples with different race but both of them report part black or black*/
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==801 & (r_partner==805 | r_partner==806 | r_partner==807 | r_partner==200))
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==805 & (r_partner==801 | r_partner==806 | r_partner==807 | r_partner==200))
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==806 & (r_partner==801 | r_partner==805 | r_partner==807 | r_partner==200))
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==807 & (r_partner==801 | r_partner==805 | r_partner==806 | r_partner==200))
replace race2_temp=2 if (race!=r_partner & race2_temp==. & race==200 & (r_partner==801 | r_partner==805 | r_partner==806 | r_partner==807))


/*  For white comparison: At least 60% + white (one white and the other reporting white + other races */
replace race2_temp=1 if (race!=r_partner & race2_temp==. & race==100 & (r_partner==801 | r_partner==802 | r_partner==803 | r_partner==804 ///
	| r_partner==810 | r_partner==811 | r_partner==812 | r_partner==813 | r_partner==814 ///
	| r_partner==816 | r_partner==817))

replace race2_temp=1 if race!=r_partner & race2_temp==. & race==801 & r_partner==100 
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==802 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==803 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==804 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==810 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==811 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==812 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==813 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==814 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==816 & r_partner==100
replace race2_temp=1 if race!=r_partner & race2_temp==. & race==817 & r_partner==100


	
/* Include couples with different race but both of them report part or full other non-black race or */

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==300 & (r_partner==650 | r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==650 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==651 & (r_partner==650 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==652 & (r_partner==651 | r_partner==650 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300)) 

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==700 & (r_partner==651 | r_partner==652 | r_partner==650 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==802 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==650 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300)) 
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==803 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==650 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300)) 
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==804 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==650 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==808 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==650 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==809 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==650 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300))
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==815 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==650 | r_partner==819 ///
 | r_partner==820 | r_partner==830 | r_partner==300)) 

replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==819 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==650 ///
 | r_partner==820 | r_partner==830 | r_partner==300))
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==820 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==650 | r_partner==830 | r_partner==300))
 
replace race2_temp=3 if (race!=r_partner & race2_temp==. & race==830 & (r_partner==651 | r_partner==652 | r_partner==700 | r_partner==802 | r_partner==803 | r_partner==804 | r_partner==808 | r_partner==809 | r_partner==815 | r_partner==819 ///
 | r_partner==820 | r_partner==650 | r_partner==300)) 
 
 
gen race2=race2_temp

	
/* Identify couples with different race but one of the two are black */

// One of the two is Black  and the other is white or other race//
gen mx_black=0
replace mx_black=1 if race!=r_partner & race==200 & (r_partner==100 | r_partner==300 | r_partner==650 | r_partner==651 | r_partner==652 | r_partner==700)
replace mx_black=1 if race!=r_partner & race==100 & r_partner==200
replace mx_black=1 if race!=r_partner & race==300 & r_partner==200 
replace mx_black=1 if race!=r_partner & race==650 & r_partner==200 
replace mx_black=1 if race!=r_partner & race==651 & r_partner==200 
replace mx_black=1 if race!=r_partner & race==652 & r_partner==200
replace mx_black=1 if race!=r_partner & race==700 & r_partner==200

// Different race any other combination where other race is at least 50% 
gen mx_oth=0
replace mx_oth=1 if race!=r_partner & race==100 & (r_partner==300 | r_partner==650 | r_partner==651 | r_partner==652 | r_partner==700)
replace mx_oth=1 if race!=r_partner & race==300 & r_partner==100
replace mx_oth=1 if race!=r_partner & race==650 & r_partner==100
replace mx_oth=1 if race!=r_partner & race==651 & r_partner==100
replace mx_oth=1 if race!=r_partner & race==652 & r_partner==100
replace mx_oth=1 if race!=r_partner & race==700 & r_partner==100    

/* Finally, include when only one is black (regardless of other race) */
gen race3=race2
replace race3=2 if mx_black==1 & race3==.
replace race3=3 if mx_oth==1 & race3==. 
replace race3=3 if race3==.

// Hispanic Couples Vs. Non Hispanic Couples // 
gen hispanic=0 if hispan==0
replace hispanic=1 if hispan!=0 & hispan!=901 & hispan!=902
 
gen hisp_partner=0 if famrel==1
replace hisp_partner=hispanic if famrel!=1
gen hisp_hd=0 if famrel!=1
replace hisp_hd=hispanic if famrel==1

bysort uniqhh: egen his_part=max(hisp_partner)
bysort uniqhh: egen his_hd=max(hisp_hd)

gen hisp=1 if his_part==1 & his_hd==1
replace hisp=0 if his_part==0 & his_hd==0
replace hisp=2 if (his_part==1 & his_hd==0) | (his_part==0 & his_hd==1)



// unique original family identifier for merge//
gen double uniqfm=(year*10000000)+(serial*100)+ famid


// Create a unique person identifier for merge with all person data //
gen double uniqid=(year*10000000)+(serial*100)+ pernum

// Determine cohabitating and married // 
gen cohab=1 if marst>1
replace cohab=0 if cohab==.



// Redifine Head //
gen head=0 
replace head=1 if marst==1 & famrel==1 


// Redifine Spouse // 
gen spouse=0 
replace spouse=1 if marst==1 & famrel==2

//Check to see that we have everyone as spouse or head //
gen check=head+spouse
sum check

// Identify same-sex couples (these are probably underrepresented and are only identify by the sex of the unmarried partner) //  
duplicates tag uniqhh sex, generate(ss)


// Identify the father in the family for all members of the same family // 
gen father=0
replace father=pernum if head==1 & ss==1 & nchild==0 // Same sex couple use the head # as head of household and spouse as mother 
replace father=sploc if spouse==1 & ss==1 & nchild==0 // Same sex couple use the head # as head of household and spouse as mother
replace father=pernum if head==1 & ss==1  & nchild>0
replace father=sploc if spouse==1 & ss==1  & nchild>0
 
replace father=pernum if (head==1 | spouse==1) & sex==1 & ss!=1 
replace father=sploc if (head==1 | spouse==1) & sex==2 & ss!=1
// Identify the mother in the family for all members of the same family //
gen mother=0
replace mother=sploc if head==1 & ss==1 & nchild==0  // Same sex couple use the head # as partner and spouse as mother 
replace mother=pernum if spouse==1 & ss==1 & nchild==0 // Same sex couple use the head # as partner and spouse as mother

replace mother=sploc if head==1 & ss==1 &  nchild>0
replace mother=pernum if spouse==1 & ss==1 &  nchild>0

replace mother=pernum if (head==1 | spouse==1) & sex==2 & ss!=1
replace mother=sploc if (head==1 | spouse==1) & sex==1 & ss!=1



keep year serial uniqhh uniqhhld uniqfm uniqid race1 race2 race3 famid famid1 pernum cohab head spouse sploc father mother hisp // keep only the relevant variables // 
  
/* Saving as a master 2 dataset to merge later with the primary families from heads of households*/
save "dta\rawdata_ipumscps\subfamilies.dta", replace


*************************************************************************************************

/* Concatenate the two master datasets 
	(doesn't include subfamily cohabitating couples: only available since 2007) 	*/

clear

use "dta\rawdata_ipumscps\master1.dta" // start with the head of household families // 
append using "dta\rawdata_ipumscps\subfamilies.dta" // append the subfamilies // 

// Determine the total number of coupled families per household //
bysort uniqhhld: gen filers=_n
bysort uniqhhld: egen famstemp=max(filers)
gen famstot=famstemp/2
drop filers famstemp
gen couples=1





save "dta\rawdata_ipumscps\allfamilies.dta", replace // set of all identifiable families (married or cohabitating heads of households) 



*************************************************************************************************************

/* Get the all persons dataset ready for the merge */ 

use "dta\rawdata_ipumscps\last_allprsns92.dta", clear
// Create a unique person identifier for merge with all person data //
gen double uniqid=(year*10000000)+(serial*100)+ pernum



// Merge with the Families Dataset //
merge 1:1 uniqid using "\dta\rawdata_ipumscps\allfamilies.dta"

// Drom the household Variable, and recreate it for all persons in the HH //

drop uniqhhld
gen double uniqhhld=(year*1000000)+(serial*10)

bysort uniqhhld: egen matched=max(_merge)

// Tabulate the race of the [presumably] heads of family that will not be included in the analysis (single/divorced/ no cohabitating) //
tab race if matched==1 & famrel==1

// Keep all the people in the households for wich we have a primary or secondary family//
// Still need to clean these by families //
keep if matched==3
drop _merge

//Separate the couple from the children/potential dependents
replace couples=0 if couples==.

// Identify father/mother for those that come from the individual (not the couples) dataset
replace father=poploc if head==. & spouse==. 
replace mother=momloc if head==. & spouse==. 

// Assign father and mother to foster kids //
bysort uniqhhld: gen fostrfam=1 if relate==1242
bysort uniqhhld: egen fostrhhld=max(fostrfam)
gen hhdhld_fstr=father if fostrhhld==1 & relate==101  // when fosterkids are identifiable in the relate variable, they are hosted by the head of household //
gen sphld_fstr=mother if fostrhhld==1 & relate==101
bysort uniqhhld: egen fatherfstr=max(hhdhld_fstr) if fostrhhld==1
bysort uniqhhld: egen motherfstr=max(sphld_fstr) if fostrhhld==1
replace father=fatherfstr if relate==1242
replace mother=motherfstr if relate==1242


 
// For simplicity, we drop all children or other family members that are older than 19, unless they are receiving SSI // 
gen disability=0 
replace disability=1 if (incssi>0 & incssi<999999) | (incdisab>0 & incdisab<999999)  
drop if head==. & spouse==. & age>23 & disability==0 // drop those older than 24 without disabilities (they belong to other units of taxation)
drop if head==. & spouse==. & (age>18 & age<24) & schlcoll==5 & disability==0 // drop those without disabilities that are not in school (between 18-23)
 
/* A robustness check can be done for years 2004 -2018 which shows who has filed their own tax return and are not dependents 
drop if head==. & spouse==. & (filestat==4 | filestat==5) & depstat==0 */

// Drop those from individual dataset without parent information (limiting information to those that we can easily assume are children of our sampled couples) //
drop if father==0 & mother==0 


// Replace missing values for total number of families in each household //
bysort uniqhhld: egen families=max(famstot)

// Drop the grandchildren of those that are in 1-family households (they are typically claimed by their parent who have been dropped already ) //
drop if father==0 & mother!=0 & relate==901 & families==1
drop if father!=0 & mother==0 & relate==901 & families==1


// Develop a flag for those in the household/family who do not have info one parent and not disabled // 
gen chk=1 if father==0 & mother!=0 & disability==0
replace chk=1 if father!=0 & mother==0 & disability==0
replace chk=2 if families==2 & chk==1 
replace chk=3 if families==3 & chk==1 
replace chk=0 if father==0 & mother!=0 & chk==.
replace chk=0 if father!=0 & mother==0 & chk==. 


// Find parent for those who only show one parent and are not disabled //
gen parent=father+mother if ((father==0 & mother!=0) | (father!=0 & mother==0)) & chk!=. 
bysort uniqhhld: egen cnt_tofindparent=count(parent)
bysort uniqhhld: egen smparent=sum(parent)
gen prnt_chk=(smparent/cnt_tofindparent) - parent
drop if prnt_chk!=. & prnt_chk!=0 // Drop individuals within families with different missing parents (there are 462 in the whole sample), but it is too hard to determine if they should be depenendents or not.  


bysort uniqhhld: egen parent_s=max(parent) if cnt_tofindparent>0
gen findparent=1 if pernum==parent_s
bysort uniqhhld: egen fndparent=max(findparent) // Fill in the flag for those for wich parent is found
gen notfndparent=1 if fndparent==. & chk!=.
drop if notfndparent==1


gen mthr_flg=mother if (mother!=0 & father==0)
gen fthr_flg=father if (father!=0 & mother==0)
bysort uniqhhld: egen mflag1=max(mthr_flg) if fndparent==1 & families==1
bysort uniqhhld: egen fflag1=max(fthr_flg) if fndparent==1 & families==1
replace fflag1=mother if findparent==1 & fndparent==1 & mflag1!=mother & fflag1==.
replace fflag1=father if findparent==1 & fndparent==1 & fflag1==.
replace mflag1=father if findparent==1 & fndparent==1 & fflag1!=father & mflag1==.
replace mflag1=mother if findparent==1 & fndparent==1 & mflag1==.

gen mflag2=mflag1 if findparent==1 & fndparent==1 & mflag1==father & fflag1==mother & mflag!=.
gen fflag2=fflag1 if findparent==1 & fndparent==1 & mflag1==father & fflag1==mother & fflag!=.
replace mflag2=mother if findparent==1 & fndparent==1 & mflag1!=. & mflag2==.
replace fflag2=father if findparent==1 & fndparent==1 & fflag1!=. & fflag2==.

bysort uniqhhld: egen mthr_correct=max(mflag2) if families==1
bysort uniqhhld: egen fthr_correct=max(fflag2) if families==1
replace mother=mthr_correct if mthr_correct!=.
replace father=fthr_correct if fthr_correct!=.



// There are children left in the data (from households with 2 and 3 families) for which we need to change the data-- I do it manually for ease //
// 1994 # 59449 -- person 7 Mom is person number 6 (assign to mom's family - father 2 mother 1 )
replace father=2 if year==1994 & serial==59449 & pernum==7
replace mother=1 if year==1994 & serial==59449 & pernum==7



// 2003 # 83941 -- Grandchild: Mom is person number 3 (assign to mom's family - father 1 mother 2 )
replace father=1 if year==2003 & serial==83941 & pernum==6
replace mother=2 if year==2003 & serial==83941 & pernum==6

// 2004 # 72537 - persons 10 and 11 are children of person 3 (assign to mom's family - father 1 mother 2 )
replace father=1 if year==2004 & serial==72537 & (pernum==10 | pernum==11) 
replace mother=2 if year==2004 & serial==72537 & (pernum==10 | pernum==11)

// 2004 # 80628 - person 2 is person 1's child (assign to parent'f family - father 8 mother 7)
replace father=8 if year==2004 & serial==80628 & pernum==2
replace mother=7 if year==2004 & serial==80628 & pernum==2


// 2005 # 79301 - person 5 is person 3's child (assign to parent's family -father 2 mother 1)
replace father=2 if year==2005 & serial==79301 & pernum==5
replace mother=1 if year==2005 & serial==79301 & pernum==5

// 2005 # 81014 - person 2 is person 1's child (assign to parent's family -father 9 mother 8)
replace father=9 if year==2005 & serial==81014 & pernum==2
replace mother=8 if year==2005 & serial==81014 & pernum==2

// 2006 # 96821 - persons 6 and 7 are 5's children (assign to parent's family - father 9 mother 8)
replace father=9 if year==2006 & serial==96821 & (pernum==6 | pernum==7) 
replace mother=8 if year==2006 & serial==96821 & (pernum==6 | pernum==7)


// 2010 # 89432 - person 8 is 3's child (assign to parent's family - father 1 mother 2)
replace father=1 if year==2010 & serial==89432 & pernum==8
replace mother=2 if year==2010 & serial==89432 & pernum==8


// 2011 # 13190 - person 7 is 3's child (assign to parent's family - father 2 mother 1) 
replace father=2 if year==2011 & serial==13190 & pernum==7
replace mother=1 if year==2011 & serial==13190 & pernum==7


// 2011 # 89127 - person 5 is 4's child (assign to parent's family - father 2 mother 1) 
replace father=2 if year==2011 & serial==89127 & pernum==5
replace mother=1 if year==2011 & serial==89127 & pernum==5


// 2012 # 12589 person 8 is 3's child (assign to parent's family - father 2 mother 1) 
replace father=2 if year==2012 & serial==12589 & pernum==8
replace mother=1 if year==2012 & serial==12589 & pernum==8


// 2012 # 14522 person 6 is 5's child (assign to parent's family - father 1 mother 2) 
replace father=1 if year==2012 & serial==14522 & pernum==6
replace mother=2 if year==2012 & serial==14522 & pernum==6


// 2012 # 26604 person 2 is 1's child (unmarried couple- assign same family - father 1 mother 5: ss couple- correct the father and mother for the couple as well) 
replace father=5 if year==2012 & serial==26604 & (pernum==2 | pernum==1 | pernum==5)
replace mother=1 if year==2012 & serial==26604 & (pernum==2 | pernum==1 | pernum==5)


// 2012 # 83032 person 10 is 9's child (over age person filing single- but living with parents- assign same famil father 12 mother 11) 
replace father=12 if year==2012 & serial==83032 & pernum==10
replace mother=11 if year==2012 & serial==83032 & pernum==10

// 2012 # 88961 person 5 is 4's child (assign to parent's  family, father 2 mother 1)  
replace father=2 if year==2012 & serial==88961 & pernum==5
replace mother=1 if year==2012 & serial==88961 & pernum==5


// 2012 # 91011 person 5 is 4's child (assign to parent's  family, father 6 mother 7 ) 
replace father=6 if year==2012 & serial==91011 & pernum==5
replace mother=7 if year==2012 & serial==91011 & pernum==5


// 2012 # 96470 person 7 is 6's child (assign to parent's  family, father 1 mother 2 ) 
replace father=1 if year==2012 & serial==96470 & pernum==7
replace mother=2 if year==2012 & serial==96470 & pernum==7

// 2012 # 96570 person 9 is 3's child (assign to parent's  family, father 2 mother 1 ) 
replace father=2 if year==2012 & serial==96570 & pernum==9
replace mother=1 if year==2012 & serial==96570 & pernum==9


// 2014 # 34020 person 7 is 2's child (assign to parent's (age 16) family, father 10 mother 1)
replace father=10 if year==2014 & serial==34020 & pernum==7
replace mother=1 if year==2014 & serial==34020 & pernum==7


// 2015 # 91540 person 6 is 5's child (assign to parent's  family, father 1 mother 2)
replace father=1 if year==2015 & serial==91540 & pernum==6
replace mother=2 if year==2015 & serial==91540 & pernum==6

// 2016 # 44403 person 2 is 1's child (assign to parent's family, father 1 mother 6) 
replace father=1 if year==2016 & serial==44403 & pernum==2
replace mother=6 if year==2016 & serial==44403 & pernum==2

// 2016 # 75346 person 7 is 1's child (assign to parent's family, father 1 mother 6) 
replace father=1 if year==2016 & serial==75346 & pernum==7
replace mother=6 if year==2016 & serial==75346 & pernum==7


// 2017 # 82558 person 7 and 11 are part of 4's family (but 4 has been dropped - no couple) 
drop if year==2017 & serial==82558 & (pernum==11 | pernum==7) 


// 2018 # 10478 person 2 is 1's child (assign to parent's family, father 1 mother 8)
replace father=1 if year==2018 & serial==10478 & pernum==2
replace mother=8 if year==2018 & serial==10478 & pernum==2


// 2019 # 78477 persons 5-7  are   3's child (assign to parent's family, father 3  mother 4)
replace father=3 if year==2019 & serial==78477 & (pernum==5 | pernum==6 | pernum==7)
replace mother=4 if year==2019 & serial==78477 & (pernum==5 | pernum==6 | pernum==7) 





// Now we can really uniquely identify all members of the same family //
drop uniqfm
egen familyid= concat(father mother)
destring familyid, replace
// Unique family identifier //
gen double uniqfm=(year*1000000000)+(serial*10000)+familyid

// Unique couple identifier within the unique family: Help us identify the head of household and spouse later on //
gen double uniqcplid=(year*10000000000)+(serial*100000)+(familyid*10)+couples

save "dta\rawdata_ipumscps\maindata.dta", replace


